#
#	Generate synthetic data from data originally generated from Pickrell data
#	Called by <FDR_and_FPR_plotting.R>
#
#	Conrad Burden 21/05/14
#
	fractionRegulated <- 1 - pi0set   # Fraction of transcripts either up- or down-regulated (1/2 each way)
	nRepGenerated <- 24	    # Number of reps of both control and treament in the generated synthetic data
	baseFactor <- 2		    # min factor by which treatment set is up- or down-regulated (typically 1 or 1.5)
#
#	Read in estimates of mu, phi for control dataset
#
	muAndPhiFile <- "ReducedPickrelMuPhiEstimates.txt"
	muAndPhiTable <- read.table(paste(muAndPhiFile, sep=""),header = T)
#
	mu <- as.matrix(muAndPhiTable[,3])
	phi <- as.matrix(muAndPhiTable[,4]) 
	nTranscripts <- length(mu)
#
#	Set mu and phi for Treatment data set
#
	numberRegulated <- floor(fractionRegulated*nTranscripts/2)*2 #to make sure it's even
	regulatedIndices <- sample(1:nTranscripts, numberRegulated)
	upregulatedIndices <- sort(regulatedIndices[1:(numberRegulated/2)])
	downregulatedIndices <- sort(regulatedIndices[(numberRegulated/2 + 1):numberRegulated])
#
	regDirection <- rep(0, nTranscripts)  # only used for 3rd column of output table
	regDirection[upregulatedIndices] <- 1
	regDirection[downregulatedIndices] <- -1
#
#	Factor by which genes are up- or down-regulated est here as 
#		mu -> mu*(baseFactor + X),  sigma^2 -> sigma^2*(baseFactor + X)    if up-reg
#		mu -> mu/(baseFactor + X),  sigma^2 -> sigma^2/(baseFactor + X)    if down-reg
#	where X is an exponential random variable with parameter lambda = 1
#
	regFactor <- rep(1,nTranscripts)
	regFactor[upregulatedIndices] <- baseFactor + rexp(numberRegulated/2)
	regFactor[downregulatedIndices] <- 1/(baseFactor + rexp(numberRegulated/2))
#
	muReg <- mu*regFactor
	phiReg <- phi
#
#	Generate counts as Negative Binomial random variables with mean and 'size' = 1/phi as above
#
	controlCounts <- array(dim=c(nTranscripts, nRepGenerated))
	treatmentCounts <- array(dim=c(nTranscripts, nRepGenerated))
	controlLibraryFactors <- 2^rnorm(nRepGenerated)
	treatmentLibraryFactors <- 2^rnorm(nRepGenerated)
	for(iTrans in 1:nTranscripts){
		muControl <- mu[iTrans]*controlLibraryFactors
		muTreatment <- muReg[iTrans]* treatmentLibraryFactors
		controlCounts[iTrans,] <- rnbinom(nRepGenerated, size=1/phi[iTrans], mu= muControl)
		treatmentCounts[iTrans,] <- rnbinom(nRepGenerated, size=1/phiReg[iTrans], mu=muTreatment)
#
#			Alternate lines for producing Poisson data (comment out when not needed)
#		controlCounts[iTrans,] <- rpois(nRepGenerated, lambda=mu[iTrans])
#		treatmentCounts[iTrans,] <- rpois(nRepGenerated, lambda=muReg[iTrans])
		}
#
#	Place tables of counts into a data frame and save to a file 
#
	syntheticDataTable <- data.frame(cbind(muAndPhiTable[,1:2], regDirection, regFactor, controlCounts, treatmentCounts)) 
	names(syntheticDataTable) <- c("ID", "RNALen", "regDirection", "regFactor", 
						"ref1", "ref2", "ref3", "ref4", "ref5", "ref6", "ref7", "ref8", "ref9", "ref10", "ref11", "ref12",
						"ref13", "ref14", "ref15", "ref16", "ref17", "ref18", "ref19", "ref20", "ref21", "ref22", "ref23", "ref24",
						"exp1", "exp2", "exp3", "exp4", "exp5", "exp6", "exp7", "exp8", "exp9", "exp10", "exp11", "exp12", 
						"exp13", "exp14", "exp15", "exp16", "exp17", "exp18", "exp19", "exp20", "exp21", "exp22", "exp23", "exp24")
#
	# syntheticDataFile <- paste("cnts_synt", fractionRegulated*100, "pMu1Filt1cpl_try44indiv.txt", sep ="")
	# write.table(syntheticDataTable, file=syntheticDataFile, sep = "\t", row.names=FALSE)
#
	cat("\n library scaling factors \n", "control:", controlLibraryFactors, 
				"\n treatment:", treatmentLibraryFactors, "\n")
	cat("\n total library sizes \n", "control:", colSums(controlCounts), 
				"\n treatment:", colSums(treatmentCounts), "\n")
#


	
	
	
